Table of contents

Check if all Prerequisites are satisfied

# check if .venv is present if not run poetry install
import os

if not os.path.exists(".venv"):
    os.system("poetry install")
    print("poetry install ran")

# now check if .venv is activated
if not os.getenv("VIRTUAL_ENV"):
    os.system("source .venv/bin/activate")
    print("venv activated")
poetry install ran
import os

if not os.path.exists(".env"):
    print("The .env file does not exist")
    # Use raise Exception("The .env file does not exist") if you want to raise an error instead
else:
    from dotenv import load_dotenv
    load_dotenv()  # take environment variables from .env.
    print("Loaded .env file")
The .env file does not exist
import os

# check if data folder exists
if not os.path.exists("./data/"):
    import kaggle

    try:
        kaggle.api.authenticate()
    except:
        print(
            "Kaggle API not authenticated. Please add KAGGLE_username and KAGGLE_key to .env file"
        )
        exit()
        
    os.makedirs('./data/', exist_ok=True)
    kaggle.api.dataset_download_files(
        "jannalipenkova/cleantech-media-dataset", path="./data/", unzip=True
    )
import pandas as pd

Load Data

# read the data
df = pd.read_csv("./data/cleantech_media_dataset_v2_2024-02-23.csv")
df_eval = pd.read_csv("./data/cleantech_rag_evaluation_data_2024-02-23.csv")
df.head()
Unnamed: 0 title date author content domain url
0 1280 Qatar to Slash Emissions as LNG Expansion Adva... 2021-01-13 NaN ["Qatar Petroleum ( QP) is targeting aggressiv... energyintel https://www.energyintel.com/0000017b-a7dc-de4c...
1 1281 India Launches Its First 700 MW PHWR 2021-01-15 NaN ["• Nuclear Power Corp. of India Ltd. ( NPCIL)... energyintel https://www.energyintel.com/0000017b-a7dc-de4c...
2 1283 New Chapter for US-China Energy Trade 2021-01-20 NaN ["New US President Joe Biden took office this ... energyintel https://www.energyintel.com/0000017b-a7dc-de4c...
3 1284 Japan: Slow Restarts Cast Doubt on 2030 Energy... 2021-01-22 NaN ["The slow pace of Japanese reactor restarts c... energyintel https://www.energyintel.com/0000017b-a7dc-de4c...
4 1285 NYC Pension Funds to Divest Fossil Fuel Shares 2021-01-25 NaN ["Two of New York City's largest pension funds... energyintel https://www.energyintel.com/0000017b-a7dc-de4c...
df_eval.head()
example_id question_id question relevant_chunk article_url
0 1 1 What is the innovation behind Leclanché's new ... Leclanché said it has developed an environment... https://www.sgvoice.net/strategy/technology/23...
1 2 2 What is the EU’s Green Deal Industrial Plan? The Green Deal Industrial Plan is a bid by the... https://www.sgvoice.net/policy/25396/eu-seeks-...
2 3 2 What is the EU’s Green Deal Industrial Plan? The European counterpart to the US Inflation R... https://www.pv-magazine.com/2023/02/02/europea...
3 4 3 What are the four focus areas of the EU's Gree... The new plan is fundamentally focused on four ... https://www.sgvoice.net/policy/25396/eu-seeks-...
4 5 4 When did the cooperation between GM and Honda ... What caught our eye was a new hookup between G... https://cleantechnica.com/2023/05/08/general-m...